new202310 rename:
B1: AD:/storage/xuhepingLab/0.share/projects/20230529_SS2_LYN
B2:
GBM:/storage/hedanyangLab/zhaojinghong/projects/25-LYN.20221212.bulk
B3:
EAE:/storage/hedanyangLab/zhaojinghong/projects/31-LYN.20230106.bulk
B4:
cuprizone:/storage/hedanyangLab/zhaojinghong/projects/17-LYN.20221010.bulk
B5: SIMPLE:
/storage/hedanyangLab/zhaojinghong/projects/37-LYN.20230219.bulk
Ctrl as C
X_Spp1_N as N
X_Spp1_P as P
load_mat <- function(dat){
mat_raw <- read.table(dat, header = TRUE, stringsAsFactors = F, sep = "\t")
rownames(mat_raw) <- mat_raw$gene_id
mat_raw <- mat_raw[,2:ncol(mat_raw)]
mat_raw <- as.matrix(mat_raw)
mat_raw <- edgeR::cpm(mat_raw)
mat_raw <- round(mat_raw)
list_pc <- 'I:/Shared_win/genomics/mouse/GRCm38_vM25/gtf_detail/list_pc.lv1_2'
id_pc <- as.vector(unlist(read.table(list_pc)))
mat_pc <- mat_raw[id_pc,]
return(mat_pc)
}
mat_pc <- list(B1=load_mat("../output_new/AD/RNAseq.20230530_SS2_LYN.counts.gene.matrix"),
B2=load_mat("../output_new/GBM/RNAseq.LYN.20221212.counts.gene.matrix"),
B3=load_mat("../output_new/EAE/RNAseq.LYN.20230106.counts.gene.matrix"),
B4=load_mat("../output_new/cuprizone/RNAseq.LYN.20221010.Spp1.counts.gene.matrix"),
B5=load_mat("../output_new/SIMPLE/RNAseq.LYN.20230219.counts.gene.matrix"))
lapply(mat_pc, dim)
## $B1
## [1] 21649 12
##
## $B2
## [1] 21649 12
##
## $B3
## [1] 21649 9
##
## $B4
## [1] 21649 10
##
## $B5
## [1] 21649 12
lapply(mat_pc, colnames)
## $B1
## [1] "B1.C.1" "B1.C.2" "B1.C.3" "B1.C.4" "B1.N.1" "B1.N.2" "B1.N.3" "B1.N.4"
## [9] "B1.P.1" "B1.P.2" "B1.P.3" "B1.P.4"
##
## $B2
## [1] "Ctrl_1" "Ctrl_2" "Ctrl_3" "Ctrl_4" "GBM_1_Spp1_N"
## [6] "GBM_1_Spp1_P" "GBM_2_Spp1_N" "GBM_2_Spp1_P" "GBM_3_Spp1_N" "GBM_3_Spp1_P"
## [11] "GBM_4_Spp1_N" "GBM_4_Spp1_P"
##
## $B3
## [1] "Ctrl_1" "Ctrl_2" "Ctrl_3" "EAE_1_GFP_N" "EAE_1_GFP_P"
## [6] "EAE_2_GFP_N" "EAE_2_GFP_P" "EAE_3_GFP_N" "EAE_3_GFP_P"
##
## $B4
## [1] "Ctrl_1" "Ctrl_2" "Ctrl_3" "Ctrl_4" "CZ_1_Spp1_N"
## [6] "CZ_1_Spp1_P" "CZ_2_Spp1_N" "CZ_2_Spp1_P" "CZ_3_Spp1_N" "CZ_3_Spp1_P"
##
## $B5
## [1] "C.1.GFP.N" "C.2.GFP.N" "C.3.GFP.N" "C.4.GFP.N" "S.1.GFP.N" "S.1.GFP.P"
## [7] "S.2.GFP.N" "S.2.GFP.P" "S.3.GFP.N" "S.3.GFP.P" "S.4.GFP.N" "S.4.GFP.P"
# reorder as C:N:P
#mat_pc$B1 <- mat_pc$B1[,c(grep("Ctrl",colnames(mat_pc$B1)),
# grep("Spp1_N",colnames(mat_pc$B1)),
# grep("Spp1_P",colnames(mat_pc$B1)),)]
mat_pc <- lapply(mat_pc, function(mat){
mat[,c(grep("^Ctrl|^C...GFP.N|B1.C",colnames(mat),perl = T),
grep("_N$|^S...GFP.N|B1.N",colnames(mat),perl = T),
grep("_P$|^S...GFP.P|B1.P",colnames(mat),perl = T))]
})
lapply(mat_pc, dim)
## $B1
## [1] 21649 12
##
## $B2
## [1] 21649 12
##
## $B3
## [1] 21649 9
##
## $B4
## [1] 21649 10
##
## $B5
## [1] 21649 12
lapply(mat_pc, colnames)
## $B1
## [1] "B1.C.1" "B1.C.2" "B1.C.3" "B1.C.4" "B1.N.1" "B1.N.2" "B1.N.3" "B1.N.4"
## [9] "B1.P.1" "B1.P.2" "B1.P.3" "B1.P.4"
##
## $B2
## [1] "Ctrl_1" "Ctrl_2" "Ctrl_3" "Ctrl_4" "GBM_1_Spp1_N"
## [6] "GBM_2_Spp1_N" "GBM_3_Spp1_N" "GBM_4_Spp1_N" "GBM_1_Spp1_P" "GBM_2_Spp1_P"
## [11] "GBM_3_Spp1_P" "GBM_4_Spp1_P"
##
## $B3
## [1] "Ctrl_1" "Ctrl_2" "Ctrl_3" "EAE_1_GFP_N" "EAE_2_GFP_N"
## [6] "EAE_3_GFP_N" "EAE_1_GFP_P" "EAE_2_GFP_P" "EAE_3_GFP_P"
##
## $B4
## [1] "Ctrl_1" "Ctrl_2" "Ctrl_3" "Ctrl_4" "CZ_1_Spp1_N"
## [6] "CZ_2_Spp1_N" "CZ_3_Spp1_N" "CZ_1_Spp1_P" "CZ_2_Spp1_P" "CZ_3_Spp1_P"
##
## $B5
## [1] "C.1.GFP.N" "C.2.GFP.N" "C.3.GFP.N" "C.4.GFP.N" "S.1.GFP.N" "S.2.GFP.N"
## [7] "S.3.GFP.N" "S.4.GFP.N" "S.1.GFP.P" "S.2.GFP.P" "S.3.GFP.P" "S.4.GFP.P"
names(mat_pc)
## [1] "B1" "B2" "B3" "B4" "B5"
# rename
for(NN in names(mat_pc)){
colnames(mat_pc[[NN]]) <- c(paste0(paste0(NN,".C."),1:length(grep("^Ctrl|^C...GFP.N|B1.C",colnames(mat_pc[[NN]]),perl = T))),
paste0(paste0(NN,".N."),1:length(grep("_N$|^S...GFP.N|B1.N",colnames(mat_pc[[NN]]),perl = T))),
paste0(paste0(NN,".P."),1:length(grep("_P$|^S...GFP.P|B1.P",colnames(mat_pc[[NN]]),perl = T))))
}
lapply(mat_pc, colnames)
## $B1
## [1] "B1.C.1" "B1.C.2" "B1.C.3" "B1.C.4" "B1.N.1" "B1.N.2" "B1.N.3" "B1.N.4"
## [9] "B1.P.1" "B1.P.2" "B1.P.3" "B1.P.4"
##
## $B2
## [1] "B2.C.1" "B2.C.2" "B2.C.3" "B2.C.4" "B2.N.1" "B2.N.2" "B2.N.3" "B2.N.4"
## [9] "B2.P.1" "B2.P.2" "B2.P.3" "B2.P.4"
##
## $B3
## [1] "B3.C.1" "B3.C.2" "B3.C.3" "B3.N.1" "B3.N.2" "B3.N.3" "B3.P.1" "B3.P.2"
## [9] "B3.P.3"
##
## $B4
## [1] "B4.C.1" "B4.C.2" "B4.C.3" "B4.C.4" "B4.N.1" "B4.N.2" "B4.N.3" "B4.P.1"
## [9] "B4.P.2" "B4.P.3"
##
## $B5
## [1] "B5.C.1" "B5.C.2" "B5.C.3" "B5.C.4" "B5.N.1" "B5.N.2" "B5.N.3" "B5.N.4"
## [9] "B5.P.1" "B5.P.2" "B5.P.3" "B5.P.4"
cowplot::plot_grid(
plotlist = lapply(mat_pc,function(mat){
ggplot(reshape2::melt(data.frame(mat)), aes(x=variable, y=log2(value+1))) + geom_point() +
geom_boxplot() +
stat_summary(fun=mean, geom="point", shape=18, size=3, color="red") +
theme_classic() +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(title = "data distribution - CPM", x="")
}), ncol = 2)
pp.PCA <- function(mat,Name="PCA", Color=c("#006BD7","#D7770D","#FF4848")){
rv <- rowVars(mat)
selt <- order(rv, decreasing = TRUE)[seq_len(2000)]
pca2 <- stats::prcomp(t(mat[selt,]), scale.=TRUE, center=TRUE)
pca_d <- as.data.frame(pca2$x)
pca_d[,"condition"] = gsub("[.]1|[.]2|[.]3|[.]4|[.]5","",x=colnames(mat),perl = T, fixed = F)
pca_d[,"batch"] = as.vector(unlist(sapply(colnames(mat), function(x){strsplit(x,split = ".")[[1]][1]})))
pca_d[,"replicate"] = as.vector(unlist(sapply(colnames(mat), function(x){strsplit(x,split = ".")[[1]][3]})))
ggplot(data=pca_d, aes(x=PC1, y=PC2, color=condition)) +
geom_point(size=3.5) +
ggrepel::geom_text_repel(mapping = aes(label=colnames(mat)),size=2.5, max.overlaps = 500) + labs(title = Name) +
scale_colour_manual(values = Color) + guides(color=guide_legend(reverse = F)) + theme_classic()
}
cowplot::plot_grid(
cowplot::plot_grid(
plotlist = lapply(mat_pc, pp.PCA, Name="PCA of CPM")[1:2],
ncol = 3),
cowplot::plot_grid(
plotlist = lapply(mat_pc, pp.PCA, Name="PCA of CPM")[3:5],
ncol = 3),
ncol = 1)
pp.PCA(cbind(mat_pc$B1,
mat_pc$B2,
mat_pc$B3,
mat_pc$B4,
mat_pc$B5
),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of CPM")
pp.PCA(cbind(log2(mat_pc$B1+1),
log2(mat_pc$B2+1),
log2(mat_pc$B3+1),
log2(mat_pc$B4+1),
log2(mat_pc$B5+1)
),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(CPM+1)")
lapply(mat_pc, colnames)[[1]]
## [1] "B1.C.1" "B1.C.2" "B1.C.3" "B1.C.4" "B1.N.1" "B1.N.2" "B1.N.3" "B1.N.4"
## [9] "B1.P.1" "B1.P.2" "B1.P.3" "B1.P.4"
gsub("[.]1|[.]2|[.]3|[.]4|[.]5","",x=lapply(mat_pc, colnames)[[1]],perl = T, fixed = F)
## [1] "B1.C" "B1.C" "B1.C" "B1.C" "B1.N" "B1.N" "B1.N" "B1.N" "B1.P" "B1.P"
## [11] "B1.P" "B1.P"
mat_pc.combine <- cbind(mat_pc$B1,
mat_pc$B2,
mat_pc$B3,
mat_pc$B4,
mat_pc$B5
)
cnt.df <- data.frame(row.names = colnames(mat_pc.combine),
batch = as.vector(unlist(sapply(colnames(mat_pc.combine), function(x){strsplit(x,split = "[.]")[[1]][1]}))),
condition1 = gsub("[.]1|[.]2|[.]3|[.]4|[.]5","",x=colnames(mat_pc.combine),perl = T, fixed = F))
cnt.df$condition2 <- cnt.df$condition1
cnt.df$condition2[grep("[.]C",cnt.df$condition2)] <- "Ctrl"
cnt.df
## batch condition1 condition2
## B1.C.1 B1 B1.C Ctrl
## B1.C.2 B1 B1.C Ctrl
## B1.C.3 B1 B1.C Ctrl
## B1.C.4 B1 B1.C Ctrl
## B1.N.1 B1 B1.N B1.N
## B1.N.2 B1 B1.N B1.N
## B1.N.3 B1 B1.N B1.N
## B1.N.4 B1 B1.N B1.N
## B1.P.1 B1 B1.P B1.P
## B1.P.2 B1 B1.P B1.P
## B1.P.3 B1 B1.P B1.P
## B1.P.4 B1 B1.P B1.P
## B2.C.1 B2 B2.C Ctrl
## B2.C.2 B2 B2.C Ctrl
## B2.C.3 B2 B2.C Ctrl
## B2.C.4 B2 B2.C Ctrl
## B2.N.1 B2 B2.N B2.N
## B2.N.2 B2 B2.N B2.N
## B2.N.3 B2 B2.N B2.N
## B2.N.4 B2 B2.N B2.N
## B2.P.1 B2 B2.P B2.P
## B2.P.2 B2 B2.P B2.P
## B2.P.3 B2 B2.P B2.P
## B2.P.4 B2 B2.P B2.P
## B3.C.1 B3 B3.C Ctrl
## B3.C.2 B3 B3.C Ctrl
## B3.C.3 B3 B3.C Ctrl
## B3.N.1 B3 B3.N B3.N
## B3.N.2 B3 B3.N B3.N
## B3.N.3 B3 B3.N B3.N
## B3.P.1 B3 B3.P B3.P
## B3.P.2 B3 B3.P B3.P
## B3.P.3 B3 B3.P B3.P
## B4.C.1 B4 B4.C Ctrl
## B4.C.2 B4 B4.C Ctrl
## B4.C.3 B4 B4.C Ctrl
## B4.C.4 B4 B4.C Ctrl
## B4.N.1 B4 B4.N B4.N
## B4.N.2 B4 B4.N B4.N
## B4.N.3 B4 B4.N B4.N
## B4.P.1 B4 B4.P B4.P
## B4.P.2 B4 B4.P B4.P
## B4.P.3 B4 B4.P B4.P
## B5.C.1 B5 B5.C Ctrl
## B5.C.2 B5 B5.C Ctrl
## B5.C.3 B5 B5.C Ctrl
## B5.C.4 B5 B5.C Ctrl
## B5.N.1 B5 B5.N B5.N
## B5.N.2 B5 B5.N B5.N
## B5.N.3 B5 B5.N B5.N
## B5.N.4 B5 B5.N B5.N
## B5.P.1 B5 B5.P B5.P
## B5.P.2 B5 B5.P B5.P
## B5.P.3 B5 B5.P B5.P
## B5.P.4 B5 B5.P B5.P
mat_pc.combine.crt <- sva::ComBat_seq(counts = as.matrix(mat_pc.combine),
batch = cnt.df$batch,
group = cnt.df$condition2)
## Found 5 batches
## Using full model in ComBat-seq.
## Adjusting for 10 covariate(s) or covariate level(s)
## Estimating dispersions
## Fitting the GLM model
## Shrinkage off - using GLM estimates for parameters
## Adjusting the data
pp.PCA(mat_pc.combine.crt,Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of CPM.crt")
pp.PCA(log2(mat_pc.combine.crt+1),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(CPM.crt+1)")
load_matt <- function(dat){
matt_raw <- read.table(dat, header = TRUE, stringsAsFactors = F, sep = "\t")
rownames(matt_raw) <- matt_raw$gene_id
matt_raw <- matt_raw[,2:ncol(matt_raw)]
matt_raw <- as.matrix(matt_raw)
#matt_raw <- edgeR::cpm(matt_raw)
#matt_raw <- round(matt_raw)
list_pc <- 'I:/Shared_win/genomics/mouse/GRCm38_vM25/gtf_detail/list_pc.lv1_2'
id_pc <- as.vector(unlist(read.table(list_pc)))
matt_pc <- matt_raw[id_pc,]
return(matt_pc)
}
matt_pc <- list(B1=load_matt("../output_new/AD/RNAseq.20230530_SS2_LYN.tpm.gene.matrix"),
B2=load_matt("../output_new/GBM/RNAseq.LYN.20221212.tpm.gene.matrix"),
B3=load_matt("../output_new/EAE/RNAseq.LYN.20230106.tpm.gene.matrix"),
B4=load_matt("../output_new/cuprizone/RNAseq.LYN.20221010.Spp1.tpm.gene.matrix"),
B5=load_matt("../output_new/SIMPLE/RNAseq.LYN.20230219.tpm.gene.matrix"))
lapply(matt_pc, dim)
## $B1
## [1] 21649 12
##
## $B2
## [1] 21649 12
##
## $B3
## [1] 21649 9
##
## $B4
## [1] 21649 10
##
## $B5
## [1] 21649 12
lapply(matt_pc, colnames)
## $B1
## [1] "B1.C.1" "B1.C.2" "B1.C.3" "B1.C.4" "B1.N.1" "B1.N.2" "B1.N.3" "B1.N.4"
## [9] "B1.P.1" "B1.P.2" "B1.P.3" "B1.P.4"
##
## $B2
## [1] "Ctrl_1" "Ctrl_2" "Ctrl_3" "Ctrl_4" "GBM_1_Spp1_N"
## [6] "GBM_1_Spp1_P" "GBM_2_Spp1_N" "GBM_2_Spp1_P" "GBM_3_Spp1_N" "GBM_3_Spp1_P"
## [11] "GBM_4_Spp1_N" "GBM_4_Spp1_P"
##
## $B3
## [1] "Ctrl_1" "Ctrl_2" "Ctrl_3" "EAE_1_GFP_N" "EAE_1_GFP_P"
## [6] "EAE_2_GFP_N" "EAE_2_GFP_P" "EAE_3_GFP_N" "EAE_3_GFP_P"
##
## $B4
## [1] "Ctrl_1" "Ctrl_2" "Ctrl_3" "Ctrl_4" "CZ_1_Spp1_N"
## [6] "CZ_1_Spp1_P" "CZ_2_Spp1_N" "CZ_2_Spp1_P" "CZ_3_Spp1_N" "CZ_3_Spp1_P"
##
## $B5
## [1] "C.1.GFP.N" "C.2.GFP.N" "C.3.GFP.N" "C.4.GFP.N" "S.1.GFP.N" "S.1.GFP.P"
## [7] "S.2.GFP.N" "S.2.GFP.P" "S.3.GFP.N" "S.3.GFP.P" "S.4.GFP.N" "S.4.GFP.P"
# reorder as C:N:P
#matt_pc$B1 <- matt_pc$B1[,c(grep("Ctrl",colnames(matt_pc$B1)),
# grep("Spp1_N",colnames(matt_pc$B1)),
# grep("Spp1_P",colnames(matt_pc$B1)),)]
matt_pc <- lapply(matt_pc, function(matt){
matt[,c(grep("^Ctrl|^C...GFP.N|B1.C",colnames(matt),perl = T),
grep("_N$|^S...GFP.N|B1.N",colnames(matt),perl = T),
grep("_P$|^S...GFP.P|B1.P",colnames(matt),perl = T))]
})
lapply(matt_pc, dim)
## $B1
## [1] 21649 12
##
## $B2
## [1] 21649 12
##
## $B3
## [1] 21649 9
##
## $B4
## [1] 21649 10
##
## $B5
## [1] 21649 12
lapply(matt_pc, colnames)
## $B1
## [1] "B1.C.1" "B1.C.2" "B1.C.3" "B1.C.4" "B1.N.1" "B1.N.2" "B1.N.3" "B1.N.4"
## [9] "B1.P.1" "B1.P.2" "B1.P.3" "B1.P.4"
##
## $B2
## [1] "Ctrl_1" "Ctrl_2" "Ctrl_3" "Ctrl_4" "GBM_1_Spp1_N"
## [6] "GBM_2_Spp1_N" "GBM_3_Spp1_N" "GBM_4_Spp1_N" "GBM_1_Spp1_P" "GBM_2_Spp1_P"
## [11] "GBM_3_Spp1_P" "GBM_4_Spp1_P"
##
## $B3
## [1] "Ctrl_1" "Ctrl_2" "Ctrl_3" "EAE_1_GFP_N" "EAE_2_GFP_N"
## [6] "EAE_3_GFP_N" "EAE_1_GFP_P" "EAE_2_GFP_P" "EAE_3_GFP_P"
##
## $B4
## [1] "Ctrl_1" "Ctrl_2" "Ctrl_3" "Ctrl_4" "CZ_1_Spp1_N"
## [6] "CZ_2_Spp1_N" "CZ_3_Spp1_N" "CZ_1_Spp1_P" "CZ_2_Spp1_P" "CZ_3_Spp1_P"
##
## $B5
## [1] "C.1.GFP.N" "C.2.GFP.N" "C.3.GFP.N" "C.4.GFP.N" "S.1.GFP.N" "S.2.GFP.N"
## [7] "S.3.GFP.N" "S.4.GFP.N" "S.1.GFP.P" "S.2.GFP.P" "S.3.GFP.P" "S.4.GFP.P"
names(matt_pc)
## [1] "B1" "B2" "B3" "B4" "B5"
# rename
for(NN in names(matt_pc)){
colnames(matt_pc[[NN]]) <- c(paste0(paste0(NN,".C."),1:length(grep("^Ctrl|^C...GFP.N|B1.C",colnames(matt_pc[[NN]]),perl = T))),
paste0(paste0(NN,".N."),1:length(grep("_N$|^S...GFP.N|B1.N",colnames(matt_pc[[NN]]),perl = T))),
paste0(paste0(NN,".P."),1:length(grep("_P$|^S...GFP.P|B1.P",colnames(matt_pc[[NN]]),perl = T))))
}
lapply(matt_pc, colnames)
## $B1
## [1] "B1.C.1" "B1.C.2" "B1.C.3" "B1.C.4" "B1.N.1" "B1.N.2" "B1.N.3" "B1.N.4"
## [9] "B1.P.1" "B1.P.2" "B1.P.3" "B1.P.4"
##
## $B2
## [1] "B2.C.1" "B2.C.2" "B2.C.3" "B2.C.4" "B2.N.1" "B2.N.2" "B2.N.3" "B2.N.4"
## [9] "B2.P.1" "B2.P.2" "B2.P.3" "B2.P.4"
##
## $B3
## [1] "B3.C.1" "B3.C.2" "B3.C.3" "B3.N.1" "B3.N.2" "B3.N.3" "B3.P.1" "B3.P.2"
## [9] "B3.P.3"
##
## $B4
## [1] "B4.C.1" "B4.C.2" "B4.C.3" "B4.C.4" "B4.N.1" "B4.N.2" "B4.N.3" "B4.P.1"
## [9] "B4.P.2" "B4.P.3"
##
## $B5
## [1] "B5.C.1" "B5.C.2" "B5.C.3" "B5.C.4" "B5.N.1" "B5.N.2" "B5.N.3" "B5.N.4"
## [9] "B5.P.1" "B5.P.2" "B5.P.3" "B5.P.4"
cowplot::plot_grid(
plotlist = lapply(matt_pc,function(matt){
ggplot(reshape2::melt(data.frame(matt)), aes(x=variable, y=log2(value+1))) + geom_point() +
geom_boxplot() +
stat_summary(fun=mean, geom="point", shape=18, size=3, color="red") +
theme_classic() +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(title = "data distribution - TPM", x="")
}), ncol = 2)
cowplot::plot_grid(
plotlist = lapply(matt_pc, pp.PCA, Name="PCA of TPM"),
ncol = 2)
pp.PCA(cbind(matt_pc$B1,
matt_pc$B2,
matt_pc$B3,
matt_pc$B4,
matt_pc$B5
),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of TPM")
pp.PCA(cbind(log2(matt_pc$B1+1),
log2(matt_pc$B2+1),
log2(matt_pc$B3+1),
log2(matt_pc$B4+1),
log2(matt_pc$B5+1)
),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(TPM+1)")
# check X/Y-link genes (only Y- remain in protein-coding list)
pheatmap::pheatmap(cbind(log2(matt_pc$B1+1),
log2(matt_pc$B2+1),
log2(matt_pc$B3+1),
log2(matt_pc$B4+1),
log2(matt_pc$B5+1)
)[c("Actb","Uty","Ddx3y","Eif2s3y","Kdm5d"),],
gaps_col = c(4,8,12,16,20,24,27,30,33,37,40,43,47,51),
cluster_cols = F, cluster_rows = F)
seems like X-link genes not in protein coding list, anyway Y- seems enough here
#c("Xist","Tsix") %in% id_pc
matt_pc.combine <- cbind(matt_pc$B1,
matt_pc$B2,
matt_pc$B3,
matt_pc$B4,
matt_pc$B5
)
cnt.df <- data.frame(row.names = colnames(matt_pc.combine),
batch = as.vector(unlist(sapply(colnames(matt_pc.combine), function(x){strsplit(x,split = "[.]")[[1]][1]}))),
condition1 = gsub("[.]1|[.]2|[.]3|[.]4|[.]5","",x=colnames(matt_pc.combine),perl = T, fixed = F))
cnt.df$condition2 <- cnt.df$condition1
cnt.df$condition2[grep("[.]C",cnt.df$condition2)] <- "Ctrl"
cnt.df
## batch condition1 condition2
## B1.C.1 B1 B1.C Ctrl
## B1.C.2 B1 B1.C Ctrl
## B1.C.3 B1 B1.C Ctrl
## B1.C.4 B1 B1.C Ctrl
## B1.N.1 B1 B1.N B1.N
## B1.N.2 B1 B1.N B1.N
## B1.N.3 B1 B1.N B1.N
## B1.N.4 B1 B1.N B1.N
## B1.P.1 B1 B1.P B1.P
## B1.P.2 B1 B1.P B1.P
## B1.P.3 B1 B1.P B1.P
## B1.P.4 B1 B1.P B1.P
## B2.C.1 B2 B2.C Ctrl
## B2.C.2 B2 B2.C Ctrl
## B2.C.3 B2 B2.C Ctrl
## B2.C.4 B2 B2.C Ctrl
## B2.N.1 B2 B2.N B2.N
## B2.N.2 B2 B2.N B2.N
## B2.N.3 B2 B2.N B2.N
## B2.N.4 B2 B2.N B2.N
## B2.P.1 B2 B2.P B2.P
## B2.P.2 B2 B2.P B2.P
## B2.P.3 B2 B2.P B2.P
## B2.P.4 B2 B2.P B2.P
## B3.C.1 B3 B3.C Ctrl
## B3.C.2 B3 B3.C Ctrl
## B3.C.3 B3 B3.C Ctrl
## B3.N.1 B3 B3.N B3.N
## B3.N.2 B3 B3.N B3.N
## B3.N.3 B3 B3.N B3.N
## B3.P.1 B3 B3.P B3.P
## B3.P.2 B3 B3.P B3.P
## B3.P.3 B3 B3.P B3.P
## B4.C.1 B4 B4.C Ctrl
## B4.C.2 B4 B4.C Ctrl
## B4.C.3 B4 B4.C Ctrl
## B4.C.4 B4 B4.C Ctrl
## B4.N.1 B4 B4.N B4.N
## B4.N.2 B4 B4.N B4.N
## B4.N.3 B4 B4.N B4.N
## B4.P.1 B4 B4.P B4.P
## B4.P.2 B4 B4.P B4.P
## B4.P.3 B4 B4.P B4.P
## B5.C.1 B5 B5.C Ctrl
## B5.C.2 B5 B5.C Ctrl
## B5.C.3 B5 B5.C Ctrl
## B5.C.4 B5 B5.C Ctrl
## B5.N.1 B5 B5.N B5.N
## B5.N.2 B5 B5.N B5.N
## B5.N.3 B5 B5.N B5.N
## B5.N.4 B5 B5.N B5.N
## B5.P.1 B5 B5.P B5.P
## B5.P.2 B5 B5.P B5.P
## B5.P.3 B5 B5.P B5.P
## B5.P.4 B5 B5.P B5.P
matt_pc.combine.crt <- sva::ComBat_seq(counts = as.matrix(matt_pc.combine),
batch = cnt.df$batch,
group = cnt.df$condition2)
## Found 5 batches
## Using full model in ComBat-seq.
## Adjusting for 10 covariate(s) or covariate level(s)
## Estimating dispersions
## Fitting the GLM model
## Shrinkage off - using GLM estimates for parameters
## Adjusting the data
pp.PCA(matt_pc.combine.crt,Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of TPM.crt")
pp.PCA(log2(matt_pc.combine.crt+1),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(TPM.crt+1)")
using log2(correctedCPM/TPM + 1) for downstream modeling
head(matt_pc.combine.crt)
## B1.C.1 B1.C.2 B1.C.3 B1.C.4 B1.N.1 B1.N.2 B1.N.3 B1.N.4 B1.P.1
## 0610009B22Rik 98 41 92 87 35 55 39 45 30
## 0610010F05Rik 3 0 7 4 4 3 4 3 2
## 0610010K14Rik 173 180 157 138 80 100 86 108 48
## 0610012G03Rik 56 41 31 51 47 52 45 50 24
## 0610030E20Rik 36 48 42 45 27 43 43 44 36
## 0610040J01Rik 285 235 217 279 142 120 119 160 70
## B1.P.2 B1.P.3 B1.P.4 B2.C.1 B2.C.2 B2.C.3 B2.C.4 B2.N.1 B2.N.2
## 0610009B22Rik 42 31 39 83 98 64 71 32 40
## 0610010F05Rik 0 2 4 2 7 4 5 5 2
## 0610010K14Rik 40 48 50 181 130 173 157 124 108
## 0610012G03Rik 40 45 45 42 39 53 45 33 41
## 0610030E20Rik 26 28 44 37 45 47 44 30 25
## 0610040J01Rik 62 74 68 287 247 236 240 64 91
## B2.N.3 B2.N.4 B2.P.1 B2.P.2 B2.P.3 B2.P.4 B3.C.1 B3.C.2 B3.C.3
## 0610009B22Rik 37 51 45 36 43 46 76 78 87
## 0610010F05Rik 3 5 5 4 6 4 4 4 5
## 0610010K14Rik 100 115 104 87 102 102 133 168 189
## 0610012G03Rik 37 39 49 43 46 47 48 38 50
## 0610030E20Rik 29 25 19 16 17 14 37 47 47
## 0610040J01Rik 50 57 48 29 19 27 270 247 253
## B3.N.1 B3.N.2 B3.N.3 B3.P.1 B3.P.2 B3.P.3 B4.C.1 B4.C.2 B4.C.3
## 0610009B22Rik 62.00 59 55 58 58 49 72 69 97.00
## 0610010F05Rik 0.81 4 4 3 3 4 4 4 0.52
## 0610010K14Rik 103.00 104 86 88 74 82 158 191 149.00
## 0610012G03Rik 60.00 49 54 55 54 50 39 50 44.00
## 0610030E20Rik 27.00 49 45 31 40 33 47 51 30.00
## 0610040J01Rik 93.00 120 87 64 68 55 260 251 259.00
## B4.C.4 B4.N.1 B4.N.2 B4.N.3 B4.P.1 B4.P.2 B4.P.3 B5.C.1 B5.C.2
## 0610009B22Rik 85 89 105 117 65 68 34.00 68 55
## 0610010F05Rik 6 2 2 2 2 2 0.52 4 6
## 0610010K14Rik 159 166 141 172 101 120 104.00 171 127
## 0610012G03Rik 48 33 51 59 36 54 66.00 39 39
## 0610030E20Rik 43 45 32 51 35 27 29.00 29 38
## 0610040J01Rik 260 207 196 205 102 97 83.00 225 212
## B5.C.3 B5.C.4 B5.N.1 B5.N.2 B5.N.3 B5.N.4 B5.P.1 B5.P.2 B5.P.3
## 0610009B22Rik 76 66 63 68 51 81 45 48 40
## 0610010F05Rik 3 2 4 3 3 2 3 3 2
## 0610010K14Rik 125 119 117 170 121 98 85 110 62
## 0610012G03Rik 34 38 38 28 38 45 53 45 44
## 0610030E20Rik 43 35 33 49 39 37 23 29 22
## 0610040J01Rik 234 183 239 189 176 213 103 81 73
## B5.P.4
## 0610009B22Rik 31
## 0610010F05Rik 3
## 0610010K14Rik 72
## 0610012G03Rik 26
## 0610030E20Rik 37
## 0610040J01Rik 160
save pdf
library(ellipse)
## Warning: package 'ellipse' was built under R version 4.0.5
##
## Attaching package: 'ellipse'
## The following object is masked from 'package:graphics':
##
## pairs
pp.PCA.m <- function(mat,Name="PCA", Color=c("#006BD7","#D7770D","#FF4848"),Size=4.5, Alpha=0.75, Font=2, ellipse=TRUE, label=TRUE, Condition=NULL){
rv <- rowVars(mat)
selt <- order(rv, decreasing = TRUE)[seq_len(2000)]
pca2 <- stats::prcomp(t(mat[selt,]), scale.=TRUE, center=TRUE)
pca_d <- as.data.frame(pca2$x)
pca_d[,"condition"] = gsub("[.]1|[.]2|[.]3|[.]4|[.]5","",x=colnames(mat),perl = T, fixed = F)
pca_d[,"batch"] = as.vector(unlist(sapply(colnames(mat), function(x){strsplit(x,split = ".")[[1]][1]})))
pca_d[,"replicate"] = as.vector(unlist(sapply(colnames(mat), function(x){strsplit(x,split = ".")[[1]][3]})))
pca_d$condition2 = pca_d$condition
if(!is.null(Condition)){
pca_d$condition2 = Condition
}
centroids <- aggregate(cbind(PC1,PC2)~condition2, pca_d, mean)
rownames(centroids) <- centroids$condition2
conf.rgn <- do.call(rbind, lapply(unique(pca_d$condition2),function(t)
data.frame(condition=as.character(t),
ellipse::ellipse(cov(pca_d[pca_d$condition2==t,1:2]),
centre=as.matrix(centroids[t,2:3]),
level=0.95),
stringsAsFactors = FALSE)))
xxx <- ggplot(data=pca_d, aes(x=PC1, y=PC2, color=condition)) +
geom_point(size=Size,alpha=Alpha) +
#stat_ellipse(level = 0.85,show.legend = F) +
#ggrepel::geom_text_repel(mapping = aes(label=colnames(mat)),size=Font, show.legend = F) +
labs(title = Name) +
scale_colour_manual(values = Color) + guides(color=guide_legend(reverse = F)) +
theme_classic()
if(label==TRUE){
xxx <- xxx+ggrepel::geom_text_repel(mapping = aes(label=colnames(mat)),size=Font, show.legend = F, max.overlaps = 50)
}
if(ellipse==TRUE){
xxx <- xxx+geom_path(data=conf.rgn, show.legend = F)
}
xxx
}
cowplot::plot_grid(
cowplot::plot_grid(
plotlist = lapply(mat_pc, pp.PCA.m, Name="PCA of CPM", ellipse=T, label=F)[1:2],
ncol = 3),
cowplot::plot_grid(
plotlist = lapply(mat_pc, pp.PCA.m, Name="PCA of CPM", ellipse=T, label=F)[3:5],
ncol = 3),
ncol = 1)
cowplot::plot_grid(
cowplot::plot_grid(
plotlist = lapply(mat_pc, pp.PCA.m, Name="PCA of CPM", ellipse=F, label=T)[1:2],
ncol = 3),
cowplot::plot_grid(
plotlist = lapply(mat_pc, pp.PCA.m, Name="PCA of CPM", ellipse=F, label=T)[3:5],
ncol = 3),
ncol = 1)
cowplot::plot_grid(
cowplot::plot_grid(
plotlist = lapply(matt_pc, pp.PCA.m, Name="PCA of TPM", ellipse=T, label=F)[1:2],
ncol = 3),
cowplot::plot_grid(
plotlist = lapply(matt_pc, pp.PCA.m, Name="PCA of TPM", ellipse=T, label=F)[3:5],
ncol = 3),
ncol = 1)
cowplot::plot_grid(
cowplot::plot_grid(
plotlist = lapply(matt_pc, pp.PCA.m, Name="PCA of TPM", ellipse=F, label=T)[1:2],
ncol = 3),
cowplot::plot_grid(
plotlist = lapply(matt_pc, pp.PCA.m, Name="PCA of TPM", ellipse=F, label=T)[3:5],
ncol = 3),
ncol = 1)
pp.PCA.m(log2(matt_pc.combine.crt+1),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(TPM.crt+1)", ellipse = F, label = T)
pp.PCA.m(log2(mat_pc.combine.crt+1),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(CPM.crt+1)", ellipse = F, label = T)
library(ellipse)
pp.PCAn <- function(mat,Name="PCA", Color=c("#006BD7","#D7770D","#FF4848"),Size=4.5, Alpha=0.75, Font=2,
ellipse=TRUE, label=TRUE, Condition=NULL){
rv <- rowVars(mat)
selt <- order(rv, decreasing = TRUE)[seq_len(2000)]
pca2 <- stats::prcomp(t(mat[selt,]), scale.=TRUE, center=TRUE)
pca_d <- as.data.frame(pca2$x)
pca_d[,"condition"] = gsub("[.]1|[.]2|[.]3|[.]4|[.]5","",x=colnames(mat),perl = T, fixed = F)
pca_d[,"batch"] = as.vector(unlist(sapply(colnames(mat), function(x){strsplit(x,split = ".",fixed = T)[[1]][1]})))
pca_d[,"replicate"] = as.vector(unlist(sapply(colnames(mat), function(x){strsplit(x,split = ".",fixed = T)[[1]][3]})))
pca_d$condition2 = pca_d$condition
if(!is.null(Condition)){
pca_d$condition2 = Condition
}
centroids <- aggregate(cbind(PC1,PC2)~condition2, pca_d, mean)
rownames(centroids) <- centroids$condition2
conf.rgn <- do.call(rbind, lapply(unique(pca_d$condition2),function(t)
data.frame(condition=as.character(t),
ellipse::ellipse(cov(pca_d[pca_d$condition2==t,1:2]),
centre=as.matrix(centroids[t,2:3]),
level=0.95),
stringsAsFactors = FALSE)))
xxx <- ggplot(data=pca_d, aes(x=PC1, y=PC2, color=condition)) +
geom_point(aes(shape=batch),size=Size,alpha=Alpha, show.legend = T) +
#stat_ellipse(level = 0.85,show.legend = F) +
#ggrepel::geom_text_repel(mapping = aes(label=colnames(mat)),size=Font, show.legend = F) +
labs(title = Name, x="PC1", y="PC2") +
scale_colour_manual(values = c(rep(Color,4),Color[1])) + guides(color=guide_legend(reverse = F)) +
guides(color=FALSE) +
scale_shape_manual(values = c(0,2,5,1,6)) +
theme_classic()
if(label==TRUE){
xxx <- xxx+ggrepel::geom_text_repel(mapping = aes(label=colnames(mat)),size=Font, show.legend = F, max.overlaps = 50)
}
if(ellipse==TRUE){
xxx <- xxx+geom_path(data=conf.rgn, show.legend = F)
}
xxx
}
pp.PCAn(log2(matt_pc.combine.crt+1),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(TPM.crt+1)", ellipse = T, label = F, Condition = cnt.df$condition2)
pp.PCAn(log2(mat_pc.combine.crt+1),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(CPM.crt+1)", ellipse = T, label = F, Condition = cnt.df$condition2)
library(ellipse)
pp.PCAn.test <- function(mat,Name="PCA", Color=c("#006BD7","#D7770D","#FF4848"),Size=4.5, Alpha=0.75, Font=2,
ellipse=TRUE, label=TRUE, Condition=NULL,
PCx="PC3",PCy="PC4"){
rv <- rowVars(mat)
selt <- order(rv, decreasing = TRUE)[seq_len(2000)]
pca2 <- stats::prcomp(t(mat[selt,]), scale.=TRUE, center=TRUE)
pca_d <- as.data.frame(pca2$x)
pca_d[,"condition"] = gsub("[.]1|[.]2|[.]3|[.]4|[.]5","",x=colnames(mat),perl = T, fixed = F)
pca_d[,"batch"] = as.vector(unlist(sapply(colnames(mat), function(x){strsplit(x,split = ".",fixed = T)[[1]][1]})))
pca_d[,"replicate"] = as.vector(unlist(sapply(colnames(mat), function(x){strsplit(x,split = ".",fixed = T)[[1]][3]})))
pca_d$condition2 = pca_d$condition
if(!is.null(Condition)){
pca_d$condition2 = Condition
}
centroids <- aggregate(cbind(get(PCx),get(PCy))~condition2, pca_d, mean)
rownames(centroids) <- centroids$condition2
conf.rgn <- do.call(rbind, lapply(unique(pca_d$condition2),function(t)
data.frame(condition=as.character(t),
ellipse::ellipse(cov(pca_d[pca_d$condition2==t,3:4]),
centre=as.matrix(centroids[t,2:3]),
level=0.95),
stringsAsFactors = FALSE)))
xxx <- ggplot(data=pca_d, aes(x=get(PCx), y=get(PCy), color=condition)) +
geom_point(aes(shape=batch),size=Size,alpha=Alpha, show.legend = T) +
#stat_ellipse(level = 0.85,show.legend = F) +
#ggrepel::geom_text_repel(mapping = aes(label=colnames(mat)),size=Font, show.legend = F) +
labs(title = Name, x=PCx, y=PCy) +
scale_colour_manual(values = c(rep(Color,4),Color[1])) + guides(color=guide_legend(reverse = F)) +
guides(color=FALSE) +
scale_shape_manual(values = c(0,2,5,1,6)) +
theme_classic()
if(label==TRUE){
xxx <- xxx+ggrepel::geom_text_repel(mapping = aes(label=colnames(mat)),size=Font, show.legend = F, max.overlaps = 50)
}
if(ellipse==TRUE){
xxx <- xxx+geom_path(data=conf.rgn, show.legend = F)
}
xxx
}
pp.PCAn.test(log2(matt_pc.combine.crt+1),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(TPM.crt+1)", ellipse = T, label = F, Condition = cnt.df$condition2,
PCx = "PC3", PCy = "PC4")
pp.PCAn.test(log2(mat_pc.combine.crt+1),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(CPM.crt+1)", ellipse = T, label = F, Condition = cnt.df$condition2,
PCx = "PC3", PCy = "PC4")
cnt.df
## batch condition1 condition2
## B1.C.1 B1 B1.C Ctrl
## B1.C.2 B1 B1.C Ctrl
## B1.C.3 B1 B1.C Ctrl
## B1.C.4 B1 B1.C Ctrl
## B1.N.1 B1 B1.N B1.N
## B1.N.2 B1 B1.N B1.N
## B1.N.3 B1 B1.N B1.N
## B1.N.4 B1 B1.N B1.N
## B1.P.1 B1 B1.P B1.P
## B1.P.2 B1 B1.P B1.P
## B1.P.3 B1 B1.P B1.P
## B1.P.4 B1 B1.P B1.P
## B2.C.1 B2 B2.C Ctrl
## B2.C.2 B2 B2.C Ctrl
## B2.C.3 B2 B2.C Ctrl
## B2.C.4 B2 B2.C Ctrl
## B2.N.1 B2 B2.N B2.N
## B2.N.2 B2 B2.N B2.N
## B2.N.3 B2 B2.N B2.N
## B2.N.4 B2 B2.N B2.N
## B2.P.1 B2 B2.P B2.P
## B2.P.2 B2 B2.P B2.P
## B2.P.3 B2 B2.P B2.P
## B2.P.4 B2 B2.P B2.P
## B3.C.1 B3 B3.C Ctrl
## B3.C.2 B3 B3.C Ctrl
## B3.C.3 B3 B3.C Ctrl
## B3.N.1 B3 B3.N B3.N
## B3.N.2 B3 B3.N B3.N
## B3.N.3 B3 B3.N B3.N
## B3.P.1 B3 B3.P B3.P
## B3.P.2 B3 B3.P B3.P
## B3.P.3 B3 B3.P B3.P
## B4.C.1 B4 B4.C Ctrl
## B4.C.2 B4 B4.C Ctrl
## B4.C.3 B4 B4.C Ctrl
## B4.C.4 B4 B4.C Ctrl
## B4.N.1 B4 B4.N B4.N
## B4.N.2 B4 B4.N B4.N
## B4.N.3 B4 B4.N B4.N
## B4.P.1 B4 B4.P B4.P
## B4.P.2 B4 B4.P B4.P
## B4.P.3 B4 B4.P B4.P
## B5.C.1 B5 B5.C Ctrl
## B5.C.2 B5 B5.C Ctrl
## B5.C.3 B5 B5.C Ctrl
## B5.C.4 B5 B5.C Ctrl
## B5.N.1 B5 B5.N B5.N
## B5.N.2 B5 B5.N B5.N
## B5.N.3 B5 B5.N B5.N
## B5.N.4 B5 B5.N B5.N
## B5.P.1 B5 B5.P B5.P
## B5.P.2 B5 B5.P B5.P
## B5.P.3 B5 B5.P B5.P
## B5.P.4 B5 B5.P B5.P
#
selt.top <- order(rowVars(matt_pc.combine.crt), decreasing = TRUE)[seq_len(8000)]
mat.test <- matt_pc.combine.crt[selt.top,]
#
idx.filt <- list()
for(NN in unique(cnt.df$condition1)){
idx.filt[[NN]] <- rowMeans(matt_pc.combine[rownames(mat.test),grep(NN,colnames(mat.test))]) > 15 &
rowSums(matt_pc.combine[rownames(mat.test),grep(NN,colnames(mat.test))]>15) >=3
}
#
idx.filt.0 <- Reduce(function(x,y){x|y},idx.filt)
mat.test <- mat.test[idx.filt.0,]
mat.test <- log2(mat.test +1)
dim(matt_pc.combine.crt)
## [1] 21649 55
dim(mat.test)
## [1] 7136 55
head(mat.test)
## B1.C.1 B1.C.2 B1.C.3 B1.C.4 B1.N.1 B1.N.2 B1.N.3
## Iigp1 0.000000 1.000000 1.584963 1.000000 1.0000000 1.000000 1.584963
## Mgl2 2.321928 1.584963 2.321928 1.584963 0.8797058 1.000000 1.000000
## Apoe 10.984418 11.380461 11.222191 11.217958 14.8830728 14.914245 14.683653
## Fam177a 0.000000 0.000000 2.321928 1.584963 2.0000000 1.584963 2.000000
## Cst3 16.189245 16.048487 16.098546 16.138392 16.0449277 16.156992 16.204839
## Ms4a7 1.000000 1.000000 1.584963 1.584963 1.0000000 1.000000 1.000000
## B1.N.4 B1.P.1 B1.P.2 B1.P.3 B1.P.4 B2.C.1 B2.C.2
## Iigp1 1.000000 1.00000 1.0000000 1.584963 1.000000 0.000000 1.584963
## Mgl2 1.584963 0.00000 0.3561438 0.704872 1.584963 2.807355 1.584963
## Apoe 14.526744 16.39126 16.1803171 16.033767 16.186037 11.594325 11.271463
## Fam177a 0.000000 2.00000 1.0000000 2.000000 1.584963 0.000000 0.000000
## Cst3 16.177400 15.51348 15.5744457 15.708922 15.642390 16.135709 16.080547
## Ms4a7 1.000000 1.00000 1.0000000 1.000000 1.000000 1.584963 1.000000
## B2.C.3 B2.C.4 B2.N.1 B2.N.2 B2.N.3 B2.N.4 B2.P.1
## Iigp1 0.6690268 1.0000000 6.000000 5.426265 5.930737 5.554589 6.044394
## Mgl2 2.0000000 1.0000000 1.000000 2.321928 3.000000 3.000000 1.000000
## Apoe 10.7648716 11.0279060 13.898885 13.934981 13.854284 14.612062 15.068400
## Fam177a 0.0000000 2.5849625 2.321928 2.584963 3.321928 3.000000 2.321928
## Cst3 16.1507389 16.0815042 14.085555 14.799029 13.821874 14.184720 12.588480
## Ms4a7 0.1634987 0.7311832 4.954196 4.754888 5.832890 5.882643 5.832890
## B2.P.2 B2.P.3 B2.P.4 B3.C.1 B3.C.2 B3.C.3 B3.N.1
## Iigp1 5.807355 5.882643 5.857981 0.00000 0.00000 0.00000 18.18734
## Mgl2 2.807355 3.169925 3.000000 0.00000 0.00000 0.00000 16.73730
## Apoe 15.097497 14.907642 15.156202 11.05664 11.21614 11.38802 14.74041
## Fam177a 3.000000 2.807355 2.321928 0.00000 0.00000 0.00000 15.04345
## Cst3 12.600378 12.235715 12.511011 16.10206 16.12763 16.17828 15.11545
## Ms4a7 7.118941 7.076816 6.954196 0.00000 0.00000 0.00000 13.87508
## B3.N.2 B3.N.3 B3.P.1 B3.P.2 B3.P.3 B4.C.1 B4.C.2
## Iigp1 18.99766 18.66766 18.43587 18.7893108 18.74362 1.000000 1.000000
## Mgl2 16.74264 16.91777 0.00000 0.8718436 0.00000 2.321928 2.321928
## Apoe 14.00922 14.62331 15.37785 15.3597496 15.44090 10.884934 11.038919
## Fam177a 16.28247 16.17865 15.33071 16.5238529 16.42813 2.000000 1.000000
## Cst3 14.86747 14.83047 14.24414 14.1350679 14.08713 16.172799 16.194488
## Ms4a7 13.88169 14.12283 16.40632 16.4740069 16.40537 1.000000 1.000000
## B4.C.3 B4.C.4 B4.N.1 B4.N.2 B4.N.3 B4.P.1 B4.P.2
## Iigp1 1.584963 0.2016339 0.00000 1.00000 0.4646683 1.584963 1.000000
## Mgl2 2.584963 1.0000000 2.00000 1.00000 1.0000000 1.000000 1.000000
## Apoe 11.354249 11.5670054 11.87460 12.20212 11.5338164 16.007838 15.865854
## Fam177a 1.584963 0.0000000 0.00000 2.00000 1.5849625 1.000000 2.321928
## Cst3 16.181463 16.0112709 16.02447 16.01072 16.0185045 15.352906 15.283342
## Ms4a7 1.584963 1.0000000 1.00000 1.00000 2.0000000 1.584963 1.000000
## B4.P.3 B5.C.1 B5.C.2 B5.C.3 B5.C.4 B5.N.1
## Iigp1 0.8237494 0.0000000 0.0000000 0.7990873 0.5945485 2.000000
## Mgl2 1.5849625 0.3895668 2.3219281 0.4646683 0.4329594 4.087463
## Apoe 15.9930047 10.6679985 11.0042205 10.8257538 11.2888661 13.142426
## Fam177a 1.0000000 0.0000000 0.0000000 2.0000000 2.0000000 1.000000
## Cst3 15.2409031 15.8412941 15.8410483 15.9093306 15.8975855 15.687813
## Ms4a7 1.0000000 0.0000000 0.7990873 0.0000000 0.0000000 0.000000
## B5.N.2 B5.N.3 B5.N.4 B5.P.1 B5.P.2 B5.P.3 B5.P.4
## Iigp1 1.5849625 2.000000 0.9030383 3.321928 4.954196 4.807355 3.700440
## Mgl2 0.0000000 2.000000 2.0000000 3.169925 2.807355 2.807355 5.083213
## Apoe 13.4614794 12.301210 11.5588990 16.204972 16.274342 15.423871 15.243620
## Fam177a 1.0000000 1.584963 1.5849625 1.584963 1.584963 1.584963 1.000000
## Cst3 15.8270697 15.840680 15.8593164 15.053205 14.709299 14.880253 15.237359
## Ms4a7 0.4436067 0.000000 0.0000000 3.169925 4.087463 3.459432 5.044394
check.outlier <- c("Ltf","S100a8","S100a9","Ngp","Camp","Trem3","Lcn2")
matt_pc.combine[c(check.outlier,"Lag3"),]
## B1.C.1 B1.C.2 B1.C.3 B1.C.4 B1.N.1 B1.N.2 B1.N.3 B1.N.4 B1.P.1 B1.P.2
## Ltf 0.00 0.00 0 0.00 0.00 0.00 0.0 0.0 0.00 0.00
## S100a8 0.00 0.00 0 0.00 0.00 0.00 0.0 0.0 0.00 0.00
## S100a9 0.00 0.00 0 0.00 0.00 0.00 0.0 0.0 0.00 0.00
## Ngp 0.00 0.00 0 0.00 0.00 0.00 0.0 0.0 0.00 0.00
## Camp 0.00 0.00 0 0.00 0.00 0.00 0.0 0.0 0.00 0.00
## Trem3 4.84 0.73 0 0.00 2.11 0.50 0.0 0.0 0.00 0.00
## Lcn2 0.00 0.00 0 0.00 0.00 0.00 0.0 0.0 0.00 0.00
## Lag3 660.05 729.37 689 690.87 919.16 1075.36 984.6 1115.3 791.13 855.37
## B1.P.3 B1.P.4 B2.C.1 B2.C.2 B2.C.3 B2.C.4 B2.N.1 B2.N.2 B2.N.3 B2.N.4
## Ltf 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.76 0.15 0.00
## S100a8 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1.32 0.00
## S100a9 0.00 0.00 0.00 1.76 0.00 0.00 0.00 0.00 1.21 0.00
## Ngp 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Camp 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Trem3 0.00 0.00 5.36 2.08 0.00 0.00 14.50 5.59 13.92 13.78
## Lcn2 0.00 0.00 1.51 0.00 0.00 0.00 8.05 0.48 3.73 0.58
## Lag3 908.82 902.91 545.32 553.36 567.03 513.83 460.77 517.29 181.96 325.38
## B2.P.1 B2.P.2 B2.P.3 B2.P.4 B3.C.1 B3.C.2 B3.C.3 B3.N.1 B3.N.2 B3.N.3
## Ltf 0.00 0.00 0.83 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## S100a8 0.00 2.08 5.40 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## S100a9 0.00 0.95 0.00 0.00 0.00 0.00 0.00 0.83 0.00 0.00
## Ngp 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Camp 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Trem3 3.52 7.10 13.15 8.49 0.51 0.00 0.00 0.00 0.79 0.68
## Lcn2 12.84 0.42 5.47 3.93 0.00 0.00 0.00 0.00 0.00 0.77
## Lag3 648.39 318.97 206.74 176.39 484.00 454.17 475.36 1083.59 989.85 1143.17
## B3.P.1 B3.P.2 B3.P.3 B4.C.1 B4.C.2 B4.C.3 B4.C.4 B4.N.1 B4.N.2 B4.N.3
## Ltf 0.00 0.00 0.00 0.0 0.00 0.00 0.0 0.00 0.00 0.00
## S100a8 0.00 1.25 1.23 0.0 0.00 0.00 0.0 0.00 0.00 0.00
## S100a9 0.00 0.00 0.00 0.0 0.00 0.00 0.0 0.00 0.00 0.00
## Ngp 0.00 0.00 0.00 0.0 0.00 0.00 0.0 0.00 0.00 0.00
## Camp 0.48 0.00 0.00 0.0 0.00 0.00 0.0 0.00 0.00 0.00
## Trem3 0.00 0.38 0.00 0.0 0.00 0.60 1.6 0.00 0.00 0.00
## Lcn2 0.37 0.85 0.86 0.0 0.72 0.00 0.0 0.00 0.00 0.00
## Lag3 1193.42 1221.94 1199.24 462.9 542.51 539.29 742.3 535.61 576.36 510.66
## B4.P.1 B4.P.2 B4.P.3 B5.C.1 B5.C.2 B5.C.3 B5.C.4 B5.N.1 B5.N.2 B5.N.3
## Ltf 0.00 0.00 0.78 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## S100a8 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 3.31 0.00
## S100a9 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Ngp 0.00 0.00 0.34 0.00 0.00 0.00 0.38 0.00 0.40 0.00
## Camp 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Trem3 0.00 0.00 0.00 0.00 1.63 0.00 0.50 0.00 0.00 0.00
## Lcn2 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Lag3 696.48 774.17 702.64 465.72 445.28 449.94 491.93 806.46 744.45 616.73
## B5.N.4 B5.P.1 B5.P.2 B5.P.3 B5.P.4
## Ltf 0.00 0.00 0.00 0.00 0.00
## S100a8 0.00 0.00 0.00 0.00 0.00
## S100a9 0.00 0.00 0.00 0.00 0.00
## Ngp 0.00 0.00 0.00 0.00 0.00
## Camp 0.00 0.00 0.00 0.00 0.00
## Trem3 1.53 0.00 0.00 0.43 0.00
## Lcn2 0.00 0.00 0.00 0.55 0.00
## Lag3 586.61 1086.39 1002.87 898.92 836.32
c(check.outlier,"Lag3") %in% rownames(mat.test)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
par(mfrow=c(2,2))
plot(x=rowMeans(mat_pc.combine.crt),
y=rowVars(mat_pc.combine.crt), log="xy",
xlim=c(1e-2,3e+04),
ylim=c(1e-2,1e+11) ,
main="unfiltered (TPM.crt)", xlab="rowMeans",ylab="rowVariations")
plot(x=rowMeans(2^(mat.test+1)),
y=rowVars(2^(mat.test+1)), log="xy",
xlim=c(1e-2,4e+04),
ylim=c(1e-2,1e+11) ,
main="filtered (TPM.crt)", xlab="rowMeans",ylab="rowVariations")
plot(x=rowMeans(log2(mat_pc.combine.crt+1) ),
y=rowVars(log2(mat_pc.combine.crt+1) ), log="xy",
xlim=c(0.02,15), ylim=c(0.01,50),
main="unfiltered (log2(TPM.crt+1) )", xlab="rowMeans",ylab="rowVariations")
plot(x=rowMeans(mat.test),
y=rowVars(mat.test), log="xy",
xlim=c(0.02,15), ylim=c(0.01,50) ,
main="filtered (log2(TPM.crt+1) )", xlab="rowMeans",ylab="rowVariations")
pdf("./result.new/matrix_filtering.pdf",
width = 7.5,height = 6)
par(mfrow=c(2,2))
plot(x=rowMeans(mat_pc.combine.crt),
y=rowVars(mat_pc.combine.crt), log="xy",
xlim=c(1e-2,3e+04),
ylim=c(1e-2,1e+11) , pch=20, cex=0.4, col = rgb(1,2,1, 60, maxColorValue = 255),
main="unfiltered (TPM.crt)", xlab="rowMeans",ylab="rowVariations")
## Warning in xy.coords(x, y, xlabel, ylabel, log): 4761 x values <= 0 omitted from
## logarithmic plot
## Warning in xy.coords(x, y, xlabel, ylabel, log): 4761 y values <= 0 omitted from
## logarithmic plot
plot(x=rowMeans(2^(mat.test+1)),
y=rowVars(2^(mat.test+1)), log="xy",
xlim=c(1e-1,4e+04),
ylim=c(1e-2,1e+11) ,pch=20, cex=0.4, col = rgb(1,2,1, 60, maxColorValue = 255),
main="filtered (TPM.crt)", xlab="rowMeans",ylab="rowVariations")
plot(x=rowMeans(log2(mat_pc.combine.crt+1) ),
y=rowVars(log2(mat_pc.combine.crt+1) ), log="xy",
xlim=c(0.02,15), ylim=c(0.01,50),pch=20, cex=0.4, col = rgb(1,2,1, 60, maxColorValue = 255),
main="unfiltered (log2(TPM.crt+1) )", xlab="rowMeans",ylab="rowVariations")
## Warning in xy.coords(x, y, xlabel, ylabel, log): 4761 x values <= 0 omitted from
## logarithmic plot
## Warning in xy.coords(x, y, xlabel, ylabel, log): 4761 y values <= 0 omitted from
## logarithmic plot
plot(x=rowMeans(mat.test),
y=rowVars(mat.test), log="xy",
xlim=c(0.1,15), ylim=c(0.01,50) ,pch=20, cex=0.4, col = rgb(1,2,1, 60, maxColorValue = 255),
main="filtered (log2(TPM.crt+1) )", xlab="rowMeans",ylab="rowVariations")
dev.off()
## png
## 2
matt_pc.crt <- list(B1=matt_pc.combine.crt[,grep("B1",colnames(matt_pc.combine.crt))],
B2=matt_pc.combine.crt[,grep("B2",colnames(matt_pc.combine.crt))],
B3=matt_pc.combine.crt[,grep("B3",colnames(matt_pc.combine.crt))],
B4=matt_pc.combine.crt[,grep("B4",colnames(matt_pc.combine.crt))],
B5=matt_pc.combine.crt[,grep("B5",colnames(matt_pc.combine.crt))]
)
cowplot::plot_grid(
plotlist = lapply(matt_pc.crt,function(matt){
ggplot(reshape2::melt(data.frame(matt)), aes(x=variable, y=log2(value+1))) + geom_point() +
geom_boxplot() +
stat_summary(fun=mean, geom="point", shape=18, size=3, color="red") +
theme_classic() +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(title = "data distribution - TPM.crt", x="")
}), ncol = 2)
par(cex=0.75, mar=c(0,5,2,0))
#topN <- 2000
plot(hclust(dist(t(mat.test )), method = "average"),
#plot(hclust(dist(t(mat.test )), method = "ward.D2"),
main = paste0("Sample clustering to detect ourliers, filtered ",nrow(mat.test)," pcgenes, log2(TPM.crt+1)"),
sub="", xlab = "", cex.lab= 1.5, cex.axis = 1.5, cex.main = 2)
#abline(h = 15, col="red")
datExpr <- t(mat.test)
sampleTree <- hclust(dist(datExpr), method = "average")
#sampleTree <- hclust(dist(datExpr), method = "ward.D2")
cnt.df
## batch condition1 condition2
## B1.C.1 B1 B1.C Ctrl
## B1.C.2 B1 B1.C Ctrl
## B1.C.3 B1 B1.C Ctrl
## B1.C.4 B1 B1.C Ctrl
## B1.N.1 B1 B1.N B1.N
## B1.N.2 B1 B1.N B1.N
## B1.N.3 B1 B1.N B1.N
## B1.N.4 B1 B1.N B1.N
## B1.P.1 B1 B1.P B1.P
## B1.P.2 B1 B1.P B1.P
## B1.P.3 B1 B1.P B1.P
## B1.P.4 B1 B1.P B1.P
## B2.C.1 B2 B2.C Ctrl
## B2.C.2 B2 B2.C Ctrl
## B2.C.3 B2 B2.C Ctrl
## B2.C.4 B2 B2.C Ctrl
## B2.N.1 B2 B2.N B2.N
## B2.N.2 B2 B2.N B2.N
## B2.N.3 B2 B2.N B2.N
## B2.N.4 B2 B2.N B2.N
## B2.P.1 B2 B2.P B2.P
## B2.P.2 B2 B2.P B2.P
## B2.P.3 B2 B2.P B2.P
## B2.P.4 B2 B2.P B2.P
## B3.C.1 B3 B3.C Ctrl
## B3.C.2 B3 B3.C Ctrl
## B3.C.3 B3 B3.C Ctrl
## B3.N.1 B3 B3.N B3.N
## B3.N.2 B3 B3.N B3.N
## B3.N.3 B3 B3.N B3.N
## B3.P.1 B3 B3.P B3.P
## B3.P.2 B3 B3.P B3.P
## B3.P.3 B3 B3.P B3.P
## B4.C.1 B4 B4.C Ctrl
## B4.C.2 B4 B4.C Ctrl
## B4.C.3 B4 B4.C Ctrl
## B4.C.4 B4 B4.C Ctrl
## B4.N.1 B4 B4.N B4.N
## B4.N.2 B4 B4.N B4.N
## B4.N.3 B4 B4.N B4.N
## B4.P.1 B4 B4.P B4.P
## B4.P.2 B4 B4.P B4.P
## B4.P.3 B4 B4.P B4.P
## B5.C.1 B5 B5.C Ctrl
## B5.C.2 B5 B5.C Ctrl
## B5.C.3 B5 B5.C Ctrl
## B5.C.4 B5 B5.C Ctrl
## B5.N.1 B5 B5.N B5.N
## B5.N.2 B5 B5.N B5.N
## B5.N.3 B5 B5.N B5.N
## B5.N.4 B5 B5.N B5.N
## B5.P.1 B5 B5.P B5.P
## B5.P.2 B5 B5.P B5.P
## B5.P.3 B5 B5.P B5.P
## B5.P.4 B5 B5.P B5.P
datTraits <- data.frame(row.names = rownames(cnt.df),
#Ctrl = grepl("Ctrl",cnt.df$condition2),
B1.C = grepl("B1.C",cnt.df$condition1),
B1.N = grepl("B1.N",cnt.df$condition1),
B1.P = grepl("B1.P",cnt.df$condition1),
B2.C = grepl("B2.C",cnt.df$condition1),
B2.N = grepl("B2.N",cnt.df$condition1),
B2.P = grepl("B2.P",cnt.df$condition1),
B3.C = grepl("B3.C",cnt.df$condition1),
B3.N = grepl("B3.N",cnt.df$condition1),
B3.P = grepl("B3.P",cnt.df$condition1),
B4.C = grepl("B4.C",cnt.df$condition1),
B4.N = grepl("B4.N",cnt.df$condition1),
B4.P = grepl("B4.P",cnt.df$condition1),
B5.C = grepl("B5.C",cnt.df$condition1),
B5.N = grepl("B5.N",cnt.df$condition1),
B5.P = grepl("B5.P",cnt.df$condition1))
datTraits[datTraits==TRUE] <- 1
datTraits[datTraits==FALSE] <- 0
datTraits
## B1.C B1.N B1.P B2.C B2.N B2.P B3.C B3.N B3.P B4.C B4.N B4.P B5.C B5.N
## B1.C.1 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## B1.C.2 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## B1.C.3 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## B1.C.4 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## B1.N.1 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## B1.N.2 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## B1.N.3 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## B1.N.4 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## B1.P.1 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## B1.P.2 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## B1.P.3 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## B1.P.4 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## B2.C.1 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## B2.C.2 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## B2.C.3 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## B2.C.4 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## B2.N.1 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## B2.N.2 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## B2.N.3 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## B2.N.4 0 0 0 0 1 0 0 0 0 0 0 0 0 0
## B2.P.1 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## B2.P.2 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## B2.P.3 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## B2.P.4 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## B3.C.1 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## B3.C.2 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## B3.C.3 0 0 0 0 0 0 1 0 0 0 0 0 0 0
## B3.N.1 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## B3.N.2 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## B3.N.3 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## B3.P.1 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## B3.P.2 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## B3.P.3 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## B4.C.1 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## B4.C.2 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## B4.C.3 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## B4.C.4 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## B4.N.1 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## B4.N.2 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## B4.N.3 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## B4.P.1 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## B4.P.2 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## B4.P.3 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## B5.C.1 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## B5.C.2 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## B5.C.3 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## B5.C.4 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## B5.N.1 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## B5.N.2 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## B5.N.3 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## B5.N.4 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## B5.P.1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## B5.P.2 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## B5.P.3 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## B5.P.4 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## B5.P
## B1.C.1 0
## B1.C.2 0
## B1.C.3 0
## B1.C.4 0
## B1.N.1 0
## B1.N.2 0
## B1.N.3 0
## B1.N.4 0
## B1.P.1 0
## B1.P.2 0
## B1.P.3 0
## B1.P.4 0
## B2.C.1 0
## B2.C.2 0
## B2.C.3 0
## B2.C.4 0
## B2.N.1 0
## B2.N.2 0
## B2.N.3 0
## B2.N.4 0
## B2.P.1 0
## B2.P.2 0
## B2.P.3 0
## B2.P.4 0
## B3.C.1 0
## B3.C.2 0
## B3.C.3 0
## B3.N.1 0
## B3.N.2 0
## B3.N.3 0
## B3.P.1 0
## B3.P.2 0
## B3.P.3 0
## B4.C.1 0
## B4.C.2 0
## B4.C.3 0
## B4.C.4 0
## B4.N.1 0
## B4.N.2 0
## B4.N.3 0
## B4.P.1 0
## B4.P.2 0
## B4.P.3 0
## B5.C.1 0
## B5.C.2 0
## B5.C.3 0
## B5.C.4 0
## B5.N.1 0
## B5.N.2 0
## B5.N.3 0
## B5.N.4 0
## B5.P.1 1
## B5.P.2 1
## B5.P.3 1
## B5.P.4 1
pheatmap::pheatmap(datTraits, cluster_rows = F, cluster_cols = F, main = "Trait matrix",
display_numbers = T, number_format = "%.0f", legend = F)
traitColors <- numbers2colors(datTraits, signed = F)
plotDendroAndColors(sampleTree, traitColors,
groupLabels = names(datTraits),
main = paste0("Sample dendrogram of filtered top",nrow(mat.test)," highly variable genes, using log2(TPM.crt+1) as input"))
powers <- c(c(1:50))
powers
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
sft <- pickSoftThreshold(data = datExpr,
powerVector = powers, verbose = 5,
networkType = "signed")
## pickSoftThreshold: will use block size 6269.
## pickSoftThreshold: calculating connectivity for given powers...
## ..working on genes 1 through 6269 of 7136
## ..working on genes 6270 through 7136 of 7136
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.9130 4.140 0.889 4090.00 4270.000 4830.0
## 2 2 0.5970 1.170 0.659 2600.00 2720.000 3630.0
## 3 3 0.0529 0.223 0.485 1770.00 1800.000 2890.0
## 4 4 0.0429 -0.183 0.584 1250.00 1230.000 2370.0
## 5 5 0.2430 -0.446 0.713 923.00 854.000 1990.0
## 6 6 0.4550 -0.662 0.804 697.00 604.000 1700.0
## 7 7 0.5890 -0.844 0.845 538.00 436.000 1470.0
## 8 8 0.6720 -0.964 0.881 422.00 326.000 1290.0
## 9 9 0.7230 -1.050 0.904 337.00 251.000 1130.0
## 10 10 0.7620 -1.130 0.918 272.00 198.000 1000.0
## 11 11 0.7940 -1.190 0.936 222.00 156.000 896.0
## 12 12 0.8240 -1.240 0.950 184.00 123.000 803.0
## 13 13 0.8100 -1.310 0.932 153.00 97.700 724.0
## 14 14 0.8090 -1.360 0.928 128.00 78.000 655.0
## 15 15 0.8160 -1.390 0.932 109.00 62.500 595.0
## 16 16 0.8300 -1.410 0.940 92.60 50.200 542.0
## 17 17 0.8440 -1.420 0.951 79.40 40.900 495.0
## 18 18 0.8580 -1.420 0.958 68.40 33.200 454.0
## 19 19 0.8600 -1.440 0.957 59.30 27.100 417.0
## 20 20 0.8710 -1.450 0.964 51.60 22.200 384.0
## 21 21 0.8860 -1.450 0.973 45.20 18.200 355.0
## 22 22 0.8890 -1.450 0.974 39.70 14.900 328.0
## 23 23 0.8970 -1.460 0.978 35.00 12.400 304.0
## 24 24 0.9030 -1.450 0.982 31.00 10.300 282.0
## 25 25 0.9090 -1.450 0.985 27.50 8.580 263.0
## 26 26 0.9150 -1.460 0.988 24.50 7.140 245.0
## 27 27 0.9150 -1.460 0.987 21.90 6.030 228.0
## 28 28 0.9190 -1.460 0.990 19.60 5.050 213.0
## 29 29 0.9210 -1.460 0.989 17.70 4.240 199.0
## 30 30 0.9250 -1.460 0.991 15.90 3.600 187.0
## 31 31 0.9290 -1.460 0.992 14.40 3.050 175.0
## 32 32 0.9320 -1.460 0.993 13.00 2.570 165.0
## 33 33 0.9380 -1.450 0.996 11.80 2.180 155.0
## 34 34 0.9410 -1.450 0.996 10.80 1.870 146.0
## 35 35 0.9440 -1.440 0.997 9.82 1.600 137.0
## 36 36 0.9410 -1.450 0.994 8.97 1.370 130.0
## 37 37 0.9420 -1.440 0.996 8.21 1.160 123.0
## 38 38 0.9440 -1.440 0.997 7.53 0.988 117.0
## 39 39 0.9430 -1.440 0.995 6.91 0.841 111.0
## 40 40 0.9410 -1.450 0.995 6.36 0.719 106.0
## 41 41 0.9370 -1.450 0.992 5.86 0.617 101.0
## 42 42 0.9400 -1.450 0.994 5.41 0.528 96.2
## 43 43 0.9390 -1.460 0.993 5.00 0.452 91.8
## 44 44 0.9410 -1.460 0.994 4.63 0.393 87.6
## 45 45 0.9330 -1.470 0.988 4.29 0.340 83.7
## 46 46 0.9290 -1.470 0.988 3.99 0.294 80.0
## 47 47 0.9290 -1.470 0.989 3.71 0.257 76.5
## 48 48 0.9320 -1.470 0.991 3.45 0.222 73.2
## 49 49 0.9350 -1.470 0.992 3.21 0.194 70.1
## 50 50 0.9270 -1.480 0.986 3.00 0.167 67.2
par(mfrow = c(2,1))
cex1 = 0.9
#
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)", ylab = "Scale Free Topology Model Fit, Signed R^2", type = "n",
main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels = powers, cex = cex1, col = "red")
abline(h=0.9, col="red")
#
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab = "Soft Threshold (power)", ylab = "Mean Connectivity", type = "n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels = powers,)
softPower <- 30
adjacency <- adjacency(datExpr = datExpr,
power = softPower,
type = "signed")
TOM <- TOMsimilarity(adjMat = adjacency, TOMType = "signed")
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
dissTOM <- 1 - TOM
geneTree <- hclust(as.dist(dissTOM), method = "average")
#geneTree <- hclust(as.dist(dissTOM), method = "ward.D2")
#
plot(geneTree, xlab = "", sub = "", main = "Gene clustering on TOM-based dissimilarity",
labels = FALSE, hang = 0.04)
# the authors like large modules, so set minimum module size relatively high
minModuleSize = 30
#
dynamicMods <- cutreeDynamic(dendro = geneTree,
distM = dissTOM,
method = "hybrid",
deepSplit = 2,
pamRespectsDendro = FALSE,
minClusterSize = minModuleSize)
## ..cutHeight not given, setting it to 0.998 ===> 99% of the (truncated) height range in dendro.
## ..done.
table(dynamicMods)
## dynamicMods
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 996 1929 1109 433 375 304 298 277 252 227 179 177 139 121 108 98
## 16 17
## 69 45
#
dynamicColors <- labels2colors(dynamicMods)
table(dynamicColors)[match(table(dynamicMods),table(dynamicColors))]
## dynamicColors
## grey turquoise blue brown yellow green
## 996 1929 1109 433 375 304
## red black pink magenta purple greenyellow
## 298 277 252 227 179 177
## tan salmon cyan midnightblue lightcyan grey60
## 139 121 108 98 69 45
#
plotDendroAndColors(geneTree,dynamicColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")
# Calculate eigengenes
MEList <- moduleEigengenes(datExpr, colors = dynamicColors)
MEs <- MEList$eigengenes
# dissimilarity of module eigengenes
#MEDiss <- 1 - cor(MEs)
MEDiss <- 1 - cor(MEs)
# cluster module eigengenes
#METree <- hclust(as.dist(MEDiss),method = "average")
METree <- hclust(as.dist(MEDiss),method = "ward.D2")
#
plot(METree, main = "Clustering of module eigengenes",
xlab = "", sub = "")
abline(h=0.15, col="red")
abline(h=0.17, col="red")
abline(h=0.2, col="red")
choose a height cut ,corresponding to correlation, to merge
MEDissThres <- 0.17
#
merge <- mergeCloseModules(datExpr, dynamicColors, cutHeight = MEDissThres, verbose = 3)
## mergeCloseModules: Merging modules whose distance is less than 0.17
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 18 module eigengenes in given set.
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 10 module eigengenes in given set.
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 9 module eigengenes in given set.
## Calculating new MEs...
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 9 module eigengenes in given set.
mergedColors <- merge$colors
mergedMEs <- merge$newMEs
#
plotDendroAndColors(geneTree,
cbind(dynamicColors, mergedColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")
# rename
moduleColors <- mergedColors
#
colorOrder <- c("grey", standardColors(50))
moduleLabels <- match(moduleColors, colorOrder)-1
MEs <- mergedMEs
standardColors(50)
## [1] "turquoise" "blue" "brown" "yellow"
## [5] "green" "red" "black" "pink"
## [9] "magenta" "purple" "greenyellow" "tan"
## [13] "salmon" "cyan" "midnightblue" "lightcyan"
## [17] "grey60" "lightgreen" "lightyellow" "royalblue"
## [21] "darkred" "darkgreen" "darkturquoise" "darkgrey"
## [25] "orange" "darkorange" "white" "skyblue"
## [29] "saddlebrown" "steelblue" "paleturquoise" "violet"
## [33] "darkolivegreen" "darkmagenta" "sienna3" "yellowgreen"
## [37] "skyblue3" "plum1" "orangered4" "mediumpurple3"
## [41] "lightsteelblue1" "lightcyan1" "ivory" "floralwhite"
## [45] "darkorange2" "brown4" "bisque4" "darkslateblue"
## [49] "plum2" "thistle2"
scales::show_col(standardColors(50))
table(dynamicMods)
## dynamicMods
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 996 1929 1109 433 375 304 298 277 252 227 179 177 139 121 108 98
## 16 17
## 69 45
length(table(dynamicMods))
## [1] 18
table(moduleLabels)
## moduleLabels
## 0 2 4 5 8 12 14 16 17
## 996 1286 375 700 3070 487 108 69 45
length(table(moduleLabels))
## [1] 9
table(moduleColors)
## moduleColors
## blue cyan green grey grey60 lightcyan pink tan
## 1286 108 700 996 45 69 3070 487
## yellow
## 375
table(moduleColors)[c(1,7,2,6,8,5,3,9,4)]
## moduleColors
## blue pink cyan lightcyan tan grey60 green yellow
## 1286 3070 108 69 487 45 700 375
## grey
## 996
data.frame(table(moduleColors)[c(1,7,2,6,8,5,3,9,4)])
## moduleColors Freq
## 1 blue 1286
## 2 pink 3070
## 3 cyan 108
## 4 lightcyan 69
## 5 tan 487
## 6 grey60 45
## 7 green 700
## 8 yellow 375
## 9 grey 996
moduleColors.new <- moduleColors
moduleColors.new[moduleColors.new == "blue"] <- "purple"
moduleColors.new[moduleColors.new == "pink"] <- "blue"
moduleColors.new[moduleColors.new == "cyan"] <- "darkorange"
moduleColors.new[moduleColors.new == "green"] <- "darkgreen"
moduleColors.new[moduleColors.new == "yellow"] <- "green"
moduleColors.new[moduleColors.new == "lightcyan"] <- "yellow"
moduleColors.new[moduleColors.new == "tan"] <- "turquoise"
moduleColors.new[moduleColors.new == "grey60"] <- "yellowgreen"
moduleColors.new[moduleColors.new == "grey"] <- "grey"
moduleColors.new <- factor(moduleColors.new,
levels = c("blue","purple",
"darkorange","yellow",
"yellowgreen",
"green","darkgreen","turquoise",
"grey"))
data.frame(table(moduleColors.new))
## moduleColors.new Freq
## 1 blue 3070
## 2 purple 1286
## 3 darkorange 108
## 4 yellow 69
## 5 yellowgreen 45
## 6 green 375
## 7 darkgreen 700
## 8 turquoise 487
## 9 grey 996
#
nSamples <- nrow(datTraits)
# Recalculate MEs with color labels
#MEs0 <- moduleEigengenes(datExpr, moduleColors)$eigengenes
MEs0 <- moduleEigengenes(datExpr, moduleColors.new)$eigengenes
#MEs <- orderMEs(MEs0)
MEs <- MEs0
moduleTraitCor <- cor(MEs, datTraits, use = "p")
#moduleTraitCor <- bicor(MEs, datTraits, use = "p", robustY = F)
moduleTraitPvalue <- corPvalueStudent(moduleTraitCor, nSamples)
#
textMatrix <- paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "")
dim(textMatrix) <- dim(moduleTraitCor)
par(mar = c(6, 10.5, 3, 3))
module_order <- names(MEs) %>% gsub("ME","",.)
#
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(datTraits),
yLabels = names(MEs),
ySymbols = module_order,
colorLabels = FALSE,
colors = blueWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = FALSE,
cex.text = 0.5,
cex.lab = 0.8,
zlim = c(-1,1),
main = paste("Module-trait relationships"))
par(mar = c(6, 10.5, 3, 3))
module_order <- names(MEs) %>% gsub("ME","",.)
#
idx.xlable <- c(4,10,1,7,11,13,
14,
2,3,12,15,8,9,5,6
)
color.ttt <- colorRampPalette(
c(
"#03047F", # deep blue
"white",
"#CC2627" # deep red
)
)(100)
labeledHeatmap(Matrix = moduleTraitCor[,idx.xlable],
xLabels = names(datTraits)[idx.xlable],
yLabels = names(MEs),
ySymbols = module_order,
colorLabels = FALSE,
colors = blueWhiteRed(50),
#colors = color.ttt,
textMatrix = textMatrix[,idx.xlable],
setStdMargins = FALSE,
cex.text = 0.5,
cex.lab = 0.8,
zlim = c(-0.95,0.95),
main = paste("Module-trait relationships"))
par(mar = c(6, 10.5, 3, 3))
module_order <- names(MEs) %>% gsub("ME","",.)
#
idx.xlable1 <- c(1,2,3,
10,11,12,13,14,15,
7,8,9,
4,5,6
)
color.ttt <- colorRampPalette(
c(
"#03047F", # deep blue
"white",
"#CC2627" # deep red
)
)(100)
labeledHeatmap(Matrix = moduleTraitCor[,idx.xlable1],
xLabels = names(datTraits)[idx.xlable1],
yLabels = names(MEs),
ySymbols = module_order,
colorLabels = FALSE,
colors = blueWhiteRed(50),
#colors = color.ttt,
textMatrix = textMatrix[,idx.xlable1],
setStdMargins = FALSE,
cex.text = 0.5,
cex.lab = 0.8,
zlim = c(-0.95,0.95),
main = paste("Module-trait relationships"))
for GO enrichment
rownames(mat.test)[moduleColors.new=="darkorange"]
## [1] "Ctsd" "C2" "Ctss" "Tyrobp" "C1qb" "C1qa"
## [7] "Ctsz" "Itm2b" "Trem2" "Cd9" "Ctsl" "Serpine2"
## [13] "Grn" "Cd63" "Cd68" "Hexa" "Ctsa" "Syngr1"
## [19] "Ly86" "Lag3" "Pld3" "Creg1" "Gusb" "Ssr4"
## [25] "Lrpap1" "Cd34" "Lamp2" "Glmp" "Bsg" "Cadm1"
## [31] "Tpp1" "Ergic3" "Slc11a1" "Cd84" "Vkorc1" "Erp29"
## [37] "Mt1" "Mpc1" "Ifnar2" "Tmed3" "Sdf4" "Grcc10"
## [43] "Naglu" "Sdf2l1" "Rnasek" "Dpp7" "Atp6ap1" "Glb1"
## [49] "Fkbp2" "Aplp2" "Nrp1" "Ramp1" "Nucb1" "Gaa"
## [55] "Pebp1" "Fuca1" "Lipa" "Atp6v1c1" "Psat1" "Renbp"
## [61] "Dusp3" "Napa" "Colgalt1" "Dnase2a" "Ank" "Klhl6"
## [67] "Fam20c" "Gde1" "Pdcd1" "Gba" "Os9" "Siglecf"
## [73] "Atxn10" "Apoc4" "Speg" "Fgf13" "C1galt1c1" "Nceh1"
## [79] "Echs1" "Ephx1" "Slc25a4" "Apbb2" "Cyb5r1" "Sil1"
## [85] "Crtap" "Ddx31" "Ogfod3" "St14" "Ggh" "Ttyh2"
## [91] "Plekhm2" "Scarb2" "Rgs3" "Tmem191c" "Cacna1a" "Chst2"
## [97] "Olfr110" "Fam3c" "Apmap" "Retsat" "Olfr111" "Arhgap24"
## [103] "Tcim" "Ptchd1" "Sord" "Mrgpre" "Enpp1" "Dnajb14"
par(cex = 0.9)
plotEigengeneNetworks(MEs, "",
marDendro = c(0,4.5,1,3),
marHeatmap = c(5,4.5,1,3),
cex.lab = 0.8,
xLabelsAngle = 90)
DEG.df <- list(B1.PvsC = read.csv("../analysis_DEGs.new202310/edgeR_DEGs.cnt.B1.P_vs_C.csv"),
B2.PvsC = read.csv("../analysis_DEGs.new202310/edgeR_DEGs.cnt.B2.P_vs_C.csv"),
B3.PvsC = read.csv("../analysis_DEGs.new202310/edgeR_DEGs.cnt.B3.P_vs_C.csv"),
B4.PvsC = read.csv("../analysis_DEGs.new202310/edgeR_DEGs.cnt.B4.P_vs_C.csv"),
B5.PvsC = read.csv("../analysis_DEGs.new202310/edgeR_DEGs.cnt.B5.P_vs_C.csv"))
DEG.df <- lapply(DEG.df, function(x){
rownames(x) <- x$gene
x
})
lapply(DEG.df, head)
## $B1.PvsC
## gene log2FoldChange logCPM LR pvalue padj
## Cst7 Cst7 8.495187 11.329086 2311.8046 0.000000e+00 0.000000e+00
## Apoe Apoe 5.700928 15.207754 1847.1664 0.000000e+00 0.000000e+00
## Lpl Lpl 6.496993 9.587738 829.5032 2.078124e-182 6.868199e-179
## Igf1 Igf1 8.259064 7.996613 814.5473 3.709031e-179 9.193760e-176
## Chst2 Chst2 5.343060 7.790534 645.9321 1.713193e-142 3.397261e-139
## Ch25h Ch25h 8.205729 7.737993 637.0509 1.463288e-140 2.418084e-137
## FC
## Cst7 360.83293
## Apoe 52.01760
## Lpl 90.32119
## Igf1 306.35565
## Chst2 40.59021
## Ch25h 295.23683
##
## $B2.PvsC
## gene log2FoldChange logCPM LR pvalue padj
## Iigp1 Iigp1 7.947249 8.247172 942.7679 4.952481e-207 5.463578e-203
## Cxcl10 Cxcl10 6.929976 7.883841 877.2153 8.810742e-193 4.860005e-189
## Cfb Cfb 10.448006 9.826880 864.0345 6.463601e-190 2.376881e-186
## Gbp2 Gbp2 6.933162 8.868324 839.2066 1.614679e-184 4.453283e-181
## Cd72 Cd72 5.662713 8.773877 806.2563 2.354160e-177 5.194218e-174
## Tgtp2 Tgtp2 6.331374 8.223505 805.0476 4.311557e-177 7.927516e-174
## FC
## Iigp1 246.80858
## Cxcl10 121.93564
## Cfb 1396.89283
## Gbp2 122.20517
## Cd72 50.65782
## Tgtp2 80.52548
##
## $B3.PvsC
## gene log2FoldChange logCPM LR pvalue padj FC
## Cfb Cfb 9.354500 7.870005 1549.713 0 0 654.61377
## C4b C4b 7.525886 9.643096 1842.803 0 0 184.29670
## H2-Aa H2-Aa 7.515011 9.346702 1945.710 0 0 182.91262
## H2-Q7 H2-Q7 7.234406 9.122856 2353.472 0 0 150.58209
## Cst7 Cst7 6.586956 9.015511 1820.812 0 0 96.13272
## H2-Q6 H2-Q6 6.291742 8.339651 1727.806 0 0 78.34350
##
## $B4.PvsC
## gene log2FoldChange logCPM LR pvalue padj
## Spp1 Spp1 7.561344 8.255223 1247.4543 2.967059e-273 2.893476e-269
## Axl Axl 5.138007 8.389047 1077.0282 3.248776e-236 1.584103e-232
## Itgax Itgax 4.908976 7.567481 812.4350 1.067804e-178 3.471074e-175
## Cst7 Cst7 5.148687 7.641870 718.1918 3.310198e-158 8.070262e-155
## Apoe Apoe 5.080873 14.998443 704.7216 2.811618e-155 5.483780e-152
## Mmp12 Mmp12 7.267412 7.091789 675.0327 8.037983e-149 1.306440e-145
## FC
## Spp1 188.88233
## Axl 35.21229
## Itgax 30.04340
## Cst7 35.47393
## Apoe 33.84504
## Mmp12 154.06675
##
## $B5.PvsC
## gene log2FoldChange logCPM LR pvalue padj
## Axl Axl 4.864722 7.834561 619.7363 8.533379e-137 8.786821e-133
## Apoe Apoe 5.189902 13.327538 548.4807 2.694577e-121 1.387303e-117
## Lgals3bp Lgals3bp 3.540251 10.685313 529.3506 3.910460e-117 1.342200e-113
## Itgax Itgax 3.313357 6.799122 480.0826 2.050056e-106 5.277358e-103
## H2-D1 H2-D1 3.074170 10.944743 454.6999 6.843673e-101 1.409386e-97
## Lyz2 Lyz2 3.058867 9.866918 423.7639 3.700029e-94 6.349867e-91
## FC
## Axl 29.135812
## Apoe 36.501950
## Lgals3bp 11.633803
## Itgax 9.940766
## H2-D1 8.422042
## Lyz2 8.333179
DEG.list.FC1.5 <- list(B1.Pup = proc_DEG(DEG.df$B1.PvsC, p.cut = 0.05, FC.cut = 1.5, abs = F)$gene,
B1.Cup = proc_DEG(DEG.df$B1.PvsC, p.cut = 0.05, FC.cut = -1.5, abs = F)$gene,
B2.Pup = proc_DEG(DEG.df$B2.PvsC, p.cut = 0.05, FC.cut = 1.5, abs = F)$gene,
B2.Cup = proc_DEG(DEG.df$B2.PvsC, p.cut = 0.05, FC.cut = -1.5, abs = F)$gene,
B3.Pup = proc_DEG(DEG.df$B3.PvsC, p.cut = 0.05, FC.cut = 1.5, abs = F)$gene,
B3.Cup = proc_DEG(DEG.df$B3.PvsC, p.cut = 0.05, FC.cut = -1.5, abs = F)$gene,
B4.Pup = proc_DEG(DEG.df$B4.PvsC, p.cut = 0.05, FC.cut = 1.5, abs = F)$gene,
B4.Cup = proc_DEG(DEG.df$B4.PvsC, p.cut = 0.05, FC.cut = -1.5, abs = F)$gene,
B5.Pup = proc_DEG(DEG.df$B5.PvsC, p.cut = 0.05, FC.cut = 1.5, abs = F)$gene,
B5.Cup = proc_DEG(DEG.df$B5.PvsC, p.cut = 0.05, FC.cut = -1.5, abs = F)$gene)
DEG.list.FC2 <- list(B1.Pup = proc_DEG(DEG.df$B1.PvsC, p.cut = 0.05, FC.cut = 2, abs = F)$gene,
B1.Cup = proc_DEG(DEG.df$B1.PvsC, p.cut = 0.05, FC.cut = -2, abs = F)$gene,
B2.Pup = proc_DEG(DEG.df$B2.PvsC, p.cut = 0.05, FC.cut = 2, abs = F)$gene,
B2.Cup = proc_DEG(DEG.df$B2.PvsC, p.cut = 0.05, FC.cut = -2, abs = F)$gene,
B3.Pup = proc_DEG(DEG.df$B3.PvsC, p.cut = 0.05, FC.cut = 2, abs = F)$gene,
B3.Cup = proc_DEG(DEG.df$B3.PvsC, p.cut = 0.05, FC.cut = -2, abs = F)$gene,
B4.Pup = proc_DEG(DEG.df$B4.PvsC, p.cut = 0.05, FC.cut = 2, abs = F)$gene,
B4.Cup = proc_DEG(DEG.df$B4.PvsC, p.cut = 0.05, FC.cut = -2, abs = F)$gene,
B5.Pup = proc_DEG(DEG.df$B5.PvsC, p.cut = 0.05, FC.cut = 2, abs = F)$gene,
B5.Cup = proc_DEG(DEG.df$B5.PvsC, p.cut = 0.05, FC.cut = -2, abs = F)$gene)
library(UpSetR)
## Warning: package 'UpSetR' was built under R version 4.0.5
upset(fromList(DEG.list.FC1.5),
order.by = 'freq', nsets = 10, point.size=3.5, line.size =1, text.scale = 2)
grid::grid.text("padj<0.05, |FC|>1.5",x = 0.65, y=0.95, gp=grid::gpar(fontsize=20))
upset(fromList(DEG.list.FC2),
order.by = 'freq', nsets = 10, point.size=3.5, line.size =1, text.scale = 2)
grid::grid.text("padj<0.05, |FC|>2",x = 0.65, y=0.95, gp=grid::gpar(fontsize=20))
upset(fromList(DEG.list.FC1.5[c(1,3,5,7,9)]),
order.by = 'freq', nsets = 8, point.size=3.5, line.size =1, text.scale = 1.6)
grid::grid.text("padj<0.05, |FC|>1.5",x = 0.65, y=0.95, gp=grid::gpar(fontsize=15))
upset(fromList(DEG.list.FC1.5[c(2,4,6,8,10)]),
order.by = 'freq', nsets = 8, point.size=3.5, line.size =1, text.scale = 1.5)
grid::grid.text("padj<0.05, |FC|>1.5",x = 0.65, y=0.95, gp=grid::gpar(fontsize=15))
#
upset(fromList(DEG.list.FC2[c(1,3,5,7,9)]),
order.by = 'freq', nsets = 8, point.size=3.5, line.size =1, text.scale = 1.5)
grid::grid.text("padj<0.05, |FC|>2",x = 0.65, y=0.95, gp=grid::gpar(fontsize=15))
upset(fromList(DEG.list.FC2[c(2,4,6,8,10)]),
order.by = 'freq', nsets = 8, point.size=3.5, line.size =1, text.scale = 1.6)
grid::grid.text("padj<0.05, |FC|>2",x = 0.65, y=0.95, gp=grid::gpar(fontsize=15))
pdf("./result.new/DEG.overlap/DEG.overlap.PvsC.Pup.FC1.5.pdf", width = 9,height = 5.4)
upset(fromList(DEG.list.FC1.5[c(1,3,5,7,9)]),
order.by = 'freq', nsets = 8, point.size=3.5, line.size =1, text.scale = 1.3)
grid::grid.text("padj<0.05, |FC|>1.5",x = 0.65, y=0.95, gp=grid::gpar(fontsize=15))
dev.off()
## png
## 2
pdf("./result.new/DEG.overlap/DEG.overlap.PvsC.Cup.FC1.5.pdf", width = 9,height = 5.4)
upset(fromList(DEG.list.FC1.5[c(2,4,6,8,10)]),
order.by = 'freq', nsets = 8, point.size=3.5, line.size =1, text.scale = 1.3)
grid::grid.text("padj<0.05, |FC|>1.5",x = 0.65, y=0.95, gp=grid::gpar(fontsize=15))
dev.off()
## png
## 2
#
pdf("./result.new/DEG.overlap/DEG.overlap.PvsC.Pup.FC2.pdf", width = 9,height = 5.4)
upset(fromList(DEG.list.FC2[c(1,3,5,7,9)]),
order.by = 'freq', nsets = 8, point.size=3.5, line.size =1, text.scale = 1.3)
grid::grid.text("padj<0.05, |FC|>2",x = 0.65, y=0.95, gp=grid::gpar(fontsize=15))
dev.off()
## png
## 2
pdf("./result.new/DEG.overlap/DEG.overlap.PvsC.Cup.FC2.pdf", width = 9,height = 5.4)
upset(fromList(DEG.list.FC2[c(2,4,6,8,10)]),
order.by = 'freq', nsets = 8, point.size=3.5, line.size =1, text.scale = 1.3)
grid::grid.text("padj<0.05, |FC|>2",x = 0.65, y=0.95, gp=grid::gpar(fontsize=15))
dev.off()
## png
## 2
#
plotTOM <- dissTOM^7
#
diag(plotTOM) <- NA
TOMplot(plotTOM, geneTree, moduleColors.new, main = ("Network heatmap plot, all genes"), col=gplots::colorpanel(250,'darkred',"orange",'lemonchiffon'))
TOMplot(plotTOM, geneTree, moduleColors.new, main = ("Network heatmap plot, all genes"))
output heatmap and matrix for each module
font.size <- 0.11/ (table(moduleColors.new) /2000)
font.size[font.size >2] <- font.size[font.size >2] +2.5
font.size[font.size <0.5] <- 0.5
font.size[font.size >10] <- 10
gg.xxx <- list()
for(xxx in names(table(moduleColors.new))){
gg.xxx[[xxx]] <- cowplot::plot_grid(
pheatmap::pheatmap(zscore_mat(log2(matt_pc.combine.crt[rownames(mat.test)[moduleColors.new==xxx],]+1)),cluster_rows = T, cluster_cols = F,
main = paste0(xxx,", zscore of log2(TPM.crt +1)"),
color = color.test,
show_rownames = T, fontsize_row = font.size[xxx],
#gaps_row = length(rets$up),
#gaps_row = 40,
silent = T,
#gaps_col = length(Aidx),
breaks = seq(-2,2,0.04))$gtable,
pheatmap::pheatmap(zscore_mat(log2(matt_pc.combine[rownames(mat.test)[moduleColors.new==xxx],]+1)),cluster_rows = T, cluster_cols = F,
main = paste0(xxx,", zscore of log2(TPM +1)"),
color = color.test,
show_rownames = T, fontsize_row = font.size[xxx],
#gaps_row = length(rets$up),
#gaps_row = 40,
silent = T,
#gaps_col = length(Aidx),
breaks = seq(-2,2,0.04))$gtable,
ncol=2)
}
#gg.xxx
#gg.xxx
cowplot::plot_grid(
plotlist = gg.xxx,
ncol = 3
)
# get reordered pheatmap rownames from TPM.crt one
# see https://www.plob.org/article/16698.html
out0 <- pheatmap::pheatmap(zscore_mat(log2(matt_pc.combine.crt[rownames(mat.test)[moduleColors.new=="blue"],]+1)),cluster_rows = T, cluster_cols = F,
main = paste0(xxx,", zscore of log2(TPM.crt +1)"),
color = color.test, fontsize = 25,
show_rownames = T, fontsize_row = 0.5,
#gaps_row = length(rets$up),
#gaps_row = 40,
silent = T,
#gaps_col = length(Aidx),
breaks = seq(-2,2,0.04))
head(rownames(mat.test)[moduleColors.new=="blue"][out0$tree_row$order],50)
## [1] "Fancg" "Mdn1" "Lmbrd1" "Tmub1"
## [5] "Wdr18" "Kiz" "Tmem237" "Exoc7"
## [9] "Vps26c" "Rad9a" "Ufd1" "Zmynd8"
## [13] "Mettl6" "Ccdc66" "Mccc1" "Smagp"
## [17] "Itgb1" "Unc119b" "Slc35b4" "Nr1d2"
## [21] "Stam" "9430015G10Rik" "Rnf123" "B3galnt2"
## [25] "Reep4" "Nfs1" "Fnta" "Clec16a"
## [29] "Mllt11" "Zfp748" "Rnaseh1" "Atrip"
## [33] "Cdk5rap1" "Tnfrsf23" "Nnt" "Wdyhv1"
## [37] "Mprip" "Ncor2" "Cep120" "Mettl4"
## [41] "Idh1" "Rabgap1l" "Fnbp4" "Fpgt"
## [45] "Ssr3" "Plcg2" "Lgals8" "Rlim"
## [49] "Rel" "Arhgap30"
#save.image("I:/Shared_win/projects/20230211_microglia_module/analysis_new202310/test1.RData")